{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 94,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "from sklearn.datasets import load_iris\n",
    "from sklearn.tree import DecisionTreeClassifier\n",
    "iris = load_iris()\n",
    "\n",
    "X = iris.data\n",
    "y = iris.target"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 95,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,\n",
       "            max_features=None, max_leaf_nodes=None,\n",
       "            min_impurity_split=1e-07, min_samples_leaf=1,\n",
       "            min_samples_split=2, min_weight_fraction_leaf=0.0,\n",
       "            presort=False, random_state=None, splitter='best')"
      ]
     },
     "execution_count": 95,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.model_selection import train_test_split\n",
    "train_X, test_X, train_y, test_y = train_test_split(X, y, test_size = 0.33, random_state = 123)\n",
    "clf = DecisionTreeClassifier()\n",
    "clf.fit(train_X, train_y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 99,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(50,)"
      ]
     },
     "execution_count": 99,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#train_X.shape\n",
    "#train_y.shape\n",
    "#test_X.shape\n",
    "#test_y.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 101,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.94\n",
      "1.0\n",
      "[[20  0  0]\n",
      " [ 0 11  0]\n",
      " [ 0  3 16]]\n"
     ]
    }
   ],
   "source": [
    "from sklearn.metrics import accuracy_score\n",
    "predicted = clf.predict(test_X)\n",
    "print(accuracy_score(test_y, predicted))\n",
    "\n",
    "predicted2 = clf.predict(train_X)\n",
    "print(accuracy_score(train_y, predicted2))\n",
    "\n",
    "\n",
    "from sklearn.metrics import confusion_matrix\n",
    "m = confusion_matrix(test_y, predicted)\n",
    "print(m)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 109,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.94000000000000006"
      ]
     },
     "execution_count": 109,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.model_selection import KFold\n",
    "acc = []\n",
    "kf = KFold(n_splits=10)\n",
    "for train, test in kf.split(X):\n",
    "    #print(train, test)\n",
    "    train_X, test_X, train_y, test_y = X[train],X[test], y[train], y[test]\n",
    "    clf = DecisionTreeClassifier()\n",
    "    clf.fit(train_X, train_y)\n",
    "    predicted = clf.predict(test_X)\n",
    "    acc.append(accuracy_score(test_y, predicted))\n",
    "sum(acc) / len(acc)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 114,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([ 1.        ,  0.93333333,  1.        ,  0.93333333,  0.93333333,\n",
       "        0.86666667,  0.93333333,  1.        ,  1.        ,  1.        ])"
      ]
     },
     "execution_count": 114,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.model_selection import cross_val_score\n",
    "acc = cross_val_score(clf, X=iris.data, y= iris.target, cv= 10)\n",
    "acc\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 115,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.95999999999999996"
      ]
     },
     "execution_count": 115,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "acc.mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 116,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.044221663871405324"
      ]
     },
     "execution_count": 116,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "acc.std()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 117,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(150, 4)"
      ]
     },
     "execution_count": 117,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 120,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.9533333333333334"
      ]
     },
     "execution_count": 120,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.model_selection import LeaveOneOut\n",
    "\n",
    "res = []\n",
    "loo = LeaveOneOut()\n",
    "for train, test in loo.split(X):\n",
    "    train_X, test_X, train_y, test_y = X[train],X[test], y[train], y[test]\n",
    "    clf = DecisionTreeClassifier()\n",
    "    clf.fit(train_X, train_y)\n",
    "    predicted = clf.predict(test_X)\n",
    "    res.extend((predicted == test_y).tolist())\n",
    "#res\n",
    "sum(res) / 150"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
